

************************************************************************************************
************************************************************************************************
*This .do file creates the results relating to the RD design for math (2b replicates the same .do file effectively but replaces math with English)

*Input:
*1. "student_mb_2009to2017.dta" (created by "Data_get_2009_17.do")

*Results created by .do file (in order):
*1. Columns (1) and (2) of Table A.1
*2. Figure C.1(a)
*3. Figures 2(a) and 2(b)
*4. Table 2 Columns (1) and (2)
*5. Table A.2 Columns (1) and (2)
*6. Table C.1 Columns (1), (2) and (3)
*7. Figure C.4(a)
*8. Figure C.2
************************************************************************************************
************************************************************************************************

clear all
use "/data_analysis/NC_RD_Retake/student_mb_2009to2017.dta"
*No need for post-2012 data (as have already created the lead test scores)
drop if year>2012
*Drop grade 8 (as have no lead test scores)
drop if grade==8

*Drop the extend tests (disabled tests)
replace smathscal=. if nc_extend_math==1
replace sreadscal=. if nc_extend_read==1
drop nc_extend_math nc_extend_math_retake nc_extend_read nc_extend_read_retake
drop lag_smathscal lag_sreadscal

*Need current or subsequent scores in at least one subject to be in RD
drop if (smathscal==. | Fsmathscal==.) & (sreadscal==. | Fsreadscal==.)

*Create treatment indicator
gen treat=(running_math<=0)
qui compress
*Missing reading score indicator
gen read_missing=(read==.)
foreach g of numlist 3(1)7 {
qui su read if grade==`g'
replace read=r(mean) if read_missing==1 & grade==`g'
}

*Set repeating next grade next year covariate to zero if missing
replace repeat_next_year=0 if repeat_next_year==.
replace repeating=0 if repeating==.
local demo_controls "i.sex i.lep i.swd i.ethnic i.eds i.aig_math i.aig_read i.repeating"
local test_controls "i.grade#c.read i.grade#c.math i.read_missing i.read_missing#i.grade#c.math"

**************************************************************
*Column (1) of Table A.1: Full sample: 2008-09 to 2011-12 for grades 3-7
**************************************************************
*foreach var of varlist smathscal sreadscal Fsmathscal{
*su `var'
*}
*gen aig=(aig_math==1 | aig_read==1)
*foreach var of varlist ethnic eds lep swd aig repeating{
*tab `var'
*}
*drop aig
**************************************************************
**************************************************************
**************************************************************

*Math sample restrictions
*Need a current same subject score
drop if smathscal==.
*Need a subsequent same subject score
drop if Fsmathscal==.

*Create dummy for reghdfe command
gen dum=1

**************************************************************
*Column (2) of Table A.1: RD sample: 2008-09 to 2011-12 for grades 4-8 within 5 units of threshold
**************************************************************
*keep if running_math>=-5 & running_math<=5
*foreach var of varlist smathscal sreadscal Fsmathscal{
*su `var'
*}
*gen aig=(aig_math==1 | aig_read==1)
*foreach var of varlist ethnic eds lep swd aig repeating{
*tab `var'
*}
*drop aig
**************************************************************
**************************************************************
**************************************************************

**************************************************************
*******************Figure C.1*********************************
**************************************************************
*histogram running_math if running_math>=-10 & running_math<=10 & Fsmathscal!=., discrete width(1) ylabel(0(0.03)0.09) xlabel(-10(2)10) xtitle("Distance to Retest Threshold (Math)") addplot(pci 0 0 .09 0) graphregion(color(white)) bgcolor(white)
*Frandsen test:
*su running_math if running_math<10 & running_math>-10
*Choosing k: have 8 support points within 1 s.d. Corresponds to k=0.032 according's to Frandsen's rule of thumb.
*rddisttestk running_math, threshold(0) k(.032)
**************************************************************
**************************************************************
**************************************************************

**************************************************************
*Figure 2: First and Second-Stage Figures
**************************************************************

**************************************************************
*******************Figure 2(a)********************************
**************************************************************
replace retested_m=retested_m*100
*cmogram retested_m running_math if running_math>=-5 & running_math<=5 & Fsmathscal!=. & smathscal!=., scatter histopts(width(0.02)) cutpoint(0) lfit graphopts(xline(0) ylabel(0(20)100) ytitle("Percent Taking Math Retest") xlabel(-5(1)5) xtitle("Distance to Retest Threshold (Math)") graphregion(color(white)) bgcolor(white))
replace retested_m=retested_m*0.01
*reghdfe retested_m treat running_math c.running_math#i.treat if running_math>=-5 & running_math<=5 & Fsmathscal!=. & smathscal!=., absorb(dum) cluster(schoolid mastid) 

**************************************************************
*******************Figure 2(b)********************************
**************************************************************
*cmogram Fsmathscal running_math if running_math>=-5 & running_math<=5, scatter histopts(width(0.02)) cutpoint(0) lfit graphopts(xline(0) ylabel(-1(0.2)-0.2) ytitle("Standardized Math Score in t+1 ({&sigma})") xlabel(-5(1)5) xtitle("Distance to Retest Threshold (Math)") graphregion(color(white)) bgcolor(white))
*reghdfe Fsmathscal treat running_math c.running_math#i.treat if running_math>=-5 & running_math<=5 & Fsmathscal!=. & smathscal!=., absorb(dum) cluster(schoolid mastid) 



**************************************************************
**************************************************************
*Table 2 Columns (1) and (2): RD regressions
**************************************************************
**************************************************************
egen gradeyear=group(grade year)
*Column (1): No covariates, column (2) covariates. t+1:
reghdfe Fsmathscal treat running_math c.running_math#i.treat if running_math>=-5 & running_math<=5, absorb(dum) cluster(schoolid mastid) 
reghdfe Fsmathscal treat running_math c.running_math#i.treat `test_controls' `demo_controls' if running_math>=-5 & running_math<=5, absorb(gradeyear) cluster(schoolid mastid) 
*t+2
reghdfe F2smathscal treat running_math c.running_math#i.treat if running_math>=-5 & running_math<=5, absorb(dum) cluster(schoolid mastid) 
reghdfe F2smathscal treat running_math c.running_math#i.treat `test_controls' `demo_controls' if running_math>=-5 & running_math<=5, absorb(gradeyear) cluster(schoolid mastid)
*t+3
reghdfe F3smathscal treat running_math c.running_math#i.treat if running_math>=-5 & running_math<=5, absorb(dum) cluster(schoolid mastid) 
reghdfe F3smathscal treat running_math c.running_math#i.treat `test_controls' `demo_controls' if running_math>=-5 & running_math<=5, absorb(gradeyear) cluster(schoolid mastid)


**************************************************************
**************************************************************
*Table A.2 Columns (1) and (2): RD regressions on Other Subject Scores
**************************************************************
**************************************************************

*Column (1): No covariates, column (2) covariates. t+1:
reghdfe Fsreadscal treat running_math c.running_math#i.treat if running_math>=-5 & running_math<=5, absorb(dum) cluster(schoolid mastid) 
reghdfe Fsreadscal treat running_math c.running_math#i.treat `test_controls' `demo_controls' if running_math>=-5 & running_math<=5, absorb(gradeyear) cluster(schoolid mastid) 
*t+2
reghdfe F2sreadscal treat running_math c.running_math#i.treat if running_math>=-5 & running_math<=5, absorb(dum) cluster(schoolid mastid) 
reghdfe F2sreadscal treat running_math c.running_math#i.treat `test_controls' `demo_controls' if running_math>=-5 & running_math<=5, absorb(gradeyear) cluster(schoolid mastid)
*t+3
reghdfe F3sreadscal treat running_math c.running_math#i.treat if running_math>=-5 & running_math<=5, absorb(dum) cluster(schoolid mastid) 
reghdfe F3sreadscal treat running_math c.running_math#i.treat `test_controls' `demo_controls' if running_math>=-5 & running_math<=5, absorb(gradeyear) cluster(schoolid mastid)


**************************************************************
**************************************************************
**************************************************************

**************************************************************
*Appendix Table C.1: Functional Form
**************************************************************
*Quadratic
gen inter=running_math*treat
gen running2=running_math*running_math
gen inter2=running_math*running_math*treat
*Column (2)
*t+1
reghdfe Fsmathscal treat running_math inter running2 inter2 `test_controls' `demo_controls' if running_math>=-5 & running_math<=5, absorb(gradeyear) cluster(schoolid mastid) 
*t+2
reghdfe F2smathscal treat running_math inter running2 inter2 `test_controls' `demo_controls' if running_math>=-5 & running_math<=5, absorb(gradeyear) cluster(schoolid mastid)
*t+3
reghdfe F3smathscal treat running_math inter running2 inter2 `test_controls' `demo_controls' if running_math>=-5 & running_math<=5, absorb(gradeyear) cluster(schoolid mastid)
*Other subject
reghdfe Fsreadscal treat running_math inter running2 inter2 `test_controls' `demo_controls' if running_math>=-5 & running_math<=5, absorb(gradeyear) cluster(schoolid mastid) 
*t+2
reghdfe F2sreadscal treat running_math inter running2 inter2 `test_controls' `demo_controls' if running_math>=-5 & running_math<=5, absorb(gradeyear) cluster(schoolid mastid)
*t+3
reghdfe F3sreadscal treat running_math inter running2 inter2 `test_controls' `demo_controls' if running_math>=-5 & running_math<=5, absorb(gradeyear) cluster(schoolid mastid)


*Triangular
gen weight=max(0,5-abs(running_math))
*Column (3)
reghdfe Fsmathscal treat running_math inter `test_controls' `demo_controls' [aw=weight] if running_math>=-5 & running_math<=5, absorb(gradeyear) cluster(schoolid mastid)
*t+2
reghdfe F2smathscal treat running_math inter `test_controls' `demo_controls' [aw=weight] if running_math>=-5 & running_math<=5, absorb(gradeyear) cluster(schoolid mastid)
*t+3
reghdfe F3smathscal treat running_math inter `test_controls' `demo_controls' [aw=weight] if running_math>=-5 & running_math<=5, absorb(gradeyear) cluster(schoolid mastid)
*Other subject
*t+1
reghdfe Fsreadscal treat running_math inter `test_controls' `demo_controls' [aw=weight] if running_math>=-5 & running_math<=5, absorb(gradeyear) cluster(schoolid mastid) 
*t+2
reghdfe F2sreadscal treat running_math inter `test_controls' `demo_controls' [aw=weight] if running_math>=-5 & running_math<=5, absorb(gradeyear) cluster(schoolid mastid)
*t+3
reghdfe F3sreadscal treat running_math inter `test_controls' `demo_controls' [aw=weight] if running_math>=-5 & running_math<=5, absorb(gradeyear) cluster(schoolid mastid)


**************************************************************
*Appendix Figure: Bandwidth Robustness
**************************************************************
mat A=[.,.,.]
foreach b of numlist 2(1)10{
display "Bandwidth is `b'"
*Main RD Regression*
qui reghdfe Fsmathscal treat running_math c.running_math#i.treat `test_controls' `demo_controls' if running_math>=-`b' & running_math<=`b', absorb(gradeyear) cluster(schoolid mastid) 
mat C=e(V)
mat D=e(b)
mat A=A\[D[1,1], C[1,1], `b']
}
svmat A
ren A1 est
ren A2 var
ren A3 bandwidth
gen lcl=est - 1.96*((var)^0.5)
gen ucl=est + 1.96*((var)^0.5)
twoway (connected est bandwidth) (rcap lcl ucl bandwidth), xtitle("Bandwidth") xlabel(2(1)10) ylabel(0(0.01)0.05) ytitle("Estimated Coefficient (Math Scores ({&sigma})") yline(0) xline(5) graphregion(color(white)) bgcolor(white)




**************************************************************
*Appendix: RD Covariates
**************************************************************
gen asian=(ethnic==1)
gen black=(ethnic==2)
gen hisp=(ethnic==3)
gen white=(ethnic==6)
gen aig=(aig_math==1 | aig_read==1)

foreach var of varlist white black hisp asian eds lep swd repeating {
replace `var'=`var'*100
}
**************************************************************
*Figure C.2
**************************************************************
cmogram sreadscal running_math if running_math>=-5 & running_math<=5 & Fsmathscal!=. & smathscal!=., scatter histopts(width(0.02)) cutpoint(0) lfit graphopts(xline(0) ylabel(-.9(.1)-.1) ytitle("Standardized English Score on Initial Test ({&sigma})") xlabel(-5(1)5) xtitle("Distance to Retest Threshold (Math)") graphregion(color(white)) bgcolor(white))
cmogram white running_math if running_math>=-5 & running_math<=5 & Fsmathscal!=. & smathscal!=., scatter histopts(width(0.02)) cutpoint(0) lfit graphopts(xline(0) ylabel(30(5)50) ytitle("Percent White") xlabel(-5(1)5) xtitle("Distance to Retest Threshold (Math)") graphregion(color(white)) bgcolor(white))
cmogram black running_math if running_math>=-5 & running_math<=5 & Fsmathscal!=. & smathscal!=., scatter histopts(width(0.02)) cutpoint(0) lfit graphopts(xline(0) ylabel(25(5)45) ytitle("Percent Black") xlabel(-5(1)5) xtitle("Distance to Retest Threshold (Math)") graphregion(color(white)) bgcolor(white))
cmogram hisp running_math if running_math>=-5 & running_math<=5 & Fsmathscal!=. & smathscal!=., scatter histopts(width(0.02)) cutpoint(0) lfit graphopts(xline(0) ylabel(14(.5)16) ytitle("Percent Hispanic") xlabel(-5(1)5) xtitle("Distance to Retest Threshold (Math)") graphregion(color(white)) bgcolor(white))
cmogram asian running_math if running_math>=-5 & running_math<=5 & Fsmathscal!=. & smathscal!=., scatter histopts(width(0.02)) cutpoint(0) lfit graphopts(xline(0) ylabel(1.1(.1)1.6) ytitle("Percent Asian") xlabel(-5(1)5) xtitle("Distance to Retest Threshold (Math)") graphregion(color(white)) bgcolor(white))
cmogram eds running_math if running_math>=-5 & running_math<=5 & Fsmathscal!=. & smathscal!=., scatter histopts(width(0.02)) cutpoint(0) lfit graphopts(xline(0) ylabel(55(5)75) ytitle("Percent Economically Disadvantaged") xlabel(-5(1)5) xtitle("Distance to Retest Threshold (Math)") graphregion(color(white)) bgcolor(white))
cmogram lep running_math if running_math>=-5 & running_math<=5 & Fsmathscal!=. & smathscal!=., scatter histopts(width(0.02)) cutpoint(0) lfit graphopts(xline(0) ylabel(7(2)13) ytitle("Percent Limited English Proficient") xlabel(-5(1)5) xtitle("Distance to Retest Threshold (Math)") graphregion(color(white)) bgcolor(white))
cmogram swd running_math if running_math>=-5 & running_math<=5 & Fsmathscal!=. & smathscal!=., scatter histopts(width(0.02)) cutpoint(0) lfit graphopts(xline(0) ylabel(10(2)20) ytitle("Percent with a Disability") xlabel(-5(1)5) xtitle("Distance to Retest Threshold (Math)") graphregion(color(white)) bgcolor(white))
cmogram repeating running_math if running_math>=-5 & running_math<=5 & Fsmathscal!=. & smathscal!=., scatter histopts(width(0.02)) cutpoint(0) lfit graphopts(xline(0) ylabel(1(0.5)2.5) ytitle("Percent Repeating Current Grade") xlabel(-5(1)5) xtitle("Distance to Retest Threshold (Math)") graphregion(color(white)) bgcolor(white))
**************************************************************
*Table version (not in paper)
**************************************************************
foreach var of varlist sreadscal white black hisp asian eds lep swd repeating {
reghdfe `var' treat running_math c.running_math#i.treat if running_math>=-5 & running_math<=5, absorb(dum) cluster(schoolid mastid) 
}



**************************************************************
*This is the fail both test analysis reported in the text (footnote 21 describes the design)
**************************************************************
clear all
use "/data_analysis/NC_RD_Retake/student_mb_2009to2017.dta"

*No need for post-2012 data
drop if year>2012
*No lagged scores for grade 3
drop if grade==3
*Need a current and lagged same subject score
drop if smathscal==. | lag_smathscal==.
drop if sreadscal==. | lag_sreadscal==.

*Cannot use extend tests restrictions
drop if nc_extend_math==1
drop if nc_extend_read==1
drop nc_extend_math nc_extend_math_retake nc_extend_read nc_extend_read_retake

*Create treatmen indicator
gen T_math=(running_math<=0)
gen T_read=(running_read<=0)
qui compress

*Set repeat next year to zero if missing
replace repeat_next_year=0 if repeat_next_year==.
replace repeating=0 if repeating==.
local demo_controls "i.sex i.lep i.swd i.ethnic i.eds i.aig_math i.aig_read i.grade i.year i.repeating i.repeat_next_year"
local test_controls "i.grade#c.lag_smathscal i.grade#c.lag_smathscal#c.lag_smathscal i.grade#c.lag_smathscal#c.lag_smathscal#c.lag_smathscal i.grade#c.lag_sreadscal i.grade#c.lag_sreadscal#c.lag_sreadscal i.grade#c.lag_sreadscal#c.lag_sreadscal#c.lag_sreadscal"

******NOW RUN Double Treat RDs******
gen double_treat=T_math*T_read
reghdfe Fsmathscal double_treat T_math T_read running_math running_read c.running_math#i.T_math c.running_read#i.T_read  i.grade i.year  if running_math>=-5 & running_math<=5 & running_read>=-5 & running_read<=5, absorb(schoolid) cluster(mastid schoolid)
reghdfe Fsmathscal double_treat T_math T_read running_math running_read c.running_math#i.T_math c.running_read#i.T_read `test_controls' `demo_controls' if running_math>=-5 & running_math<=5 & running_read>=-5 & running_read<=5, absorb(schoolid) cluster(mastid schoolid)

reghdfe Fsreadscal double_treat T_math T_read running_math running_read c.running_math#i.T_math c.running_read#i.T_read  i.grade i.year  if running_math>=-5 & running_math<=5 & running_read>=-5 & running_read<=5, absorb(schoolid) cluster(mastid schoolid)
reghdfe Fsreadscal double_treat T_math T_read running_math running_read c.running_math#i.T_math c.running_read#i.T_read `test_controls' `demo_controls' if running_math>=-5 & running_math<=5 & running_read>=-5 & running_read<=5, absorb(schoolid) cluster(mastid schoolid)








